/* Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include "udm_config.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>

#include "udm_common.h"
#include "udm_spell.h"
#include "udm_hrefs.h"
#include "udm_utils.h"
#include "udm_xmalloc.h"
#include "udm_sgml.h"

/* Max URLs in cache: 4K URLs will use about 200K of RAM         */
/* This should be a configurable parameter but we'll use 4K now  */

#define HSIZE		256	/* Length of buffer increment  TUNE */
#define RESORT_HREFS	256	/* Max length of unsorted part TUNE */


/* Function to sort URLs in alphabetic order */
static int cmphrefs(const void * v1, const void * v2){
	return(strcmp(((const UDM_HREF*)v1)->url,((const UDM_HREF*)v2)->url));
}


__INDLIB__ int UdmHrefListAdd(UDM_HREFLIST * HrefList,UDM_HREF * Href){
	int l,r,c,res;
	size_t i,len;
	char ehref[UDM_URLSIZE];
	
	/* Don't add empty or too long link */
	len=strlen(Href->url);
	if((len<1)||(len>UDM_URLSIZE-1))return(0);

	strcpy(ehref,Href->url);
	UdmTrim(ehref," \t\r\n");
	UdmStrRemoveChars(ehref,"\t\r\n");
	UdmSGMLUnescape(ehref);

	/* Find current URL in sorted part of list */
	l=0;r=HrefList->shrefs-1;
	while(l<=r){
		c=(l+r)/2;
		if(!(res=strcmp(HrefList->Href[c].url,ehref))){
			HrefList->Href[c].stored|=Href->stored;
			return(0);
		}
		if(res<0)
			l=c+1;
		else
			r=c-1;
	}
	/* Find in unsorted part */
	for(i=HrefList->shrefs;i<HrefList->nhrefs;i++){
		if(!strcmp(HrefList->Href[i].url,ehref)){
			HrefList->Href[i].stored|=Href->stored;
			return(0);
		}
	}
	if(HrefList->nhrefs>=HrefList->mhrefs){
		HrefList->mhrefs+=HSIZE;
		HrefList->Href=(UDM_HREF *)realloc(HrefList->Href,HrefList->mhrefs*sizeof(UDM_HREF));
	}
	HrefList->Href[HrefList->nhrefs].url=strdup(ehref);
	HrefList->Href[HrefList->nhrefs].referrer=Href->referrer;
	HrefList->Href[HrefList->nhrefs].hops=Href->hops;
	HrefList->Href[HrefList->nhrefs].method=Href->method;
	HrefList->Href[HrefList->nhrefs].stored=Href->stored;
	HrefList->Href[HrefList->nhrefs].tag=Href->tag?strdup(Href->tag):NULL;
	HrefList->Href[HrefList->nhrefs].category=Href->category?strdup(Href->category):NULL;
	HrefList->nhrefs++;

	/* Sort unsorted part */
	if((HrefList->nhrefs-HrefList->shrefs)>RESORT_HREFS){
		qsort(HrefList->Href,HrefList->nhrefs,sizeof(UDM_HREF),cmphrefs);
		/* Remember count of sorted URLs  */
		HrefList->shrefs=HrefList->nhrefs;
		/* Count of stored URLs became 0  */
		HrefList->dhrefs=0;
	}
	return(1);
}
extern __INDLIB__ void UdmHrefListFree(UDM_HREFLIST * HrefList){
	size_t i;
	
	for(i=0;i<HrefList->nhrefs;i++){
		UDM_FREE(HrefList->Href[i].url);
		UDM_FREE(HrefList->Href[i].tag);
		UDM_FREE(HrefList->Href[i].category);
	}
	UDM_FREE(HrefList->Href);
	bzero(HrefList,sizeof(*HrefList));
}
__INDLIB__ UDM_HREFLIST * UdmHrefListInit(UDM_HREFLIST * Hrefs){
	bzero(Hrefs,sizeof(*Hrefs));
	return(Hrefs);
}
